# Create summary statistics.
summary_stats <- fishnet %>%
st_drop_geometry() %>%
dplyr::select(
count_burglaries,
dist_to_hotspot_outages,
count_vacant,
nn_station,
dist_to_streets,
building_density,
avg_building_age,
pct_Business_Commercial,
pct_Downtown,
pct_Industrial,
pct_Park_Open_Space
) %>%
summarise(across(everything(),
list(
Min = ~min(., na.rm = TRUE),
Q1 = ~quantile(., 0.25, na.rm = TRUE),
Median = ~median(., na.rm = TRUE),
Mean = ~mean(., na.rm = TRUE),
Q3 = ~quantile(., 0.75, na.rm = TRUE),
Max = ~max(., na.rm = TRUE)
))) %>%
pivot_longer(
everything(),
names_to = c("Variable", "Statistic"),
names_pattern = "(.*)_(Min|Q1|Median|Mean|Q3|Max)$" # Make the pattern permanent, or else it'll mix up the order.
) %>%
pivot_wider(names_from = Statistic, values_from = value)
# Rename to make the table more neat and professional. Mark percentages for nicer value labeling.
summary_stats <- summary_stats %>%
mutate(
# Flag percent variables to format the values later.
is_percent = grepl("^pct_", Variable) | Variable == "building_density",
Variable = case_when(
Variable == "count_burglaries" ~ "Burglary Count",
Variable == "dist_to_hotspot_outages" ~ "Distance to Outage Hotspots (Meters)",
Variable == "count_vacant" ~ "Vacancy Count",
Variable == "nn_station" ~ "Distance to Nearest Station (Meters)",
Variable == "dist_to_streets" ~ "Distance to Major Street (Meters)",
Variable == "building_density" ~ "Building Density (Percent %)",
Variable == "avg_building_age" ~ "Average Building Age",
Variable == "pct_Business_Commercial" ~ "% Business/Commercial",
Variable == "pct_Downtown" ~ "% Downtown",
Variable == "pct_Industrial" ~ "% Industrial",
Variable == "pct_Park_Open_Space" ~ "% Park/Open Space",
TRUE ~ Variable
)
)
# Format the values in the table.
summary_stats <- summary_stats %>%
mutate(
is_distance = grepl("Distance", Variable), # Flag distance variables
across(c(Min, Q1, Median, Mean, Q3, Max),
~case_when(
is_percent ~ paste0(format(round(. * 100, 2), big.mark = ","), "%"),
is_distance ~ paste0(format(round(., 2), big.mark = ","), "m"),
TRUE ~ format(round(., 2), big.mark = ",")
))
) %>%
dplyr::select(-is_percent, -is_distance) # Remove the flag columns so they don't show up in the Kable.
# Create Kable.
summary_stats %>%
mutate(Variable = cell_spec(Variable, bold = TRUE)) %>%
kable(
format = "html", # HTML formatting for visualization flexibility.
escape = FALSE,
caption = "Summary Statistics of Final Variables",
col.names = c("Variable", "Minimum", "1st Quartile", "Median", "Mean", "3rd Quartile", "Maximum"),
align = c("l", rep("c", 6)) # Left-align left column, repeat center-align for next 6 columns.
) %>%
kable_styling(
bootstrap_options = c("striped", "hover", "condensed"), # Make the large Kable visually compact.
full_width = FALSE,
position = "center"
) %>%
row_spec(0, bold = TRUE, color = "white", background = "#21918c", align = "c")